# STAT 202 Project
# K nearest neighbors
# Author: Fatih Sunor
#####################################################

# KNN
rm(list = ls(all = TRUE));
train <- read.csv("training.csv",header=TRUE);
test <- read.csv("test.csv",header=TRUE);
feature <- cbind(train[,6:7], log(sqrt(train[,9]*train[,10])+1), train[,11]);
testFeature <- cbind(test[,6:7], log(sqrt(test[,9]*test[,10])+1), test[,11]);
relevance <- as.factor(train[,13]);

trainErr<-NULL;
k<-seq(1,10,by=1);
# K-NN
for(i in 1:10){
	fitTrain<-knn(feature,testFeature,t(relevance),3);
	trainErr<-1-sum(relevance==fitTrain)/length(relevance);
}
# Plot
plot(k, trainErr, type="l", col='red', lwd=2, main="Classification Error vs k", xlab="k", ylab="Error", xlim=c(1,10), ylim=c(0,1));

truePositive<-function(fit,rel){
	fit==1 && rel == 1;
}